{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import json\n",
    "from py2neo import Graph, Node\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(\"csv_path\") # Path to CSV file for creating knowledge graph"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Define entity (Nodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "diseases = []\n",
    "for each in df['Disease Name']:\n",
    "    diseases.extend(each.split(','))\n",
    "diseases = set(diseases)\n",
    "diseases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "symptoms = []\n",
    "for each in df['Symptom']:\n",
    "    symptoms.extend(each.split(','))\n",
    "symptoms = set(symptoms)\n",
    "symptoms"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathogenesis = []\n",
    "for each in df['Pathogenesis']:\n",
    "    pathogenesis.extend(each.split(','))\n",
    "pathogenesis = set(pathogenesis)\n",
    "pathogenesis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "therapeutic_principles = []\n",
    "for each in df['Treatment Principle']:\n",
    "    therapeutic_principles.extend(each.split(','))\n",
    "therapeutic_principles = set(therapeutic_principles)\n",
    "therapeutic_principles"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "medicines = []\n",
    "for each in df['Medication']:\n",
    "    medicines.extend(each.split(','))\n",
    "medicines = set(medicines)\n",
    "medicines"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Define relationship (Edges)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define relationship deduplication function\n",
    "def deduplicate(rels_old):\n",
    "    rels_new = []\n",
    "    for each in rels_old:\n",
    "        if each not in rels_new:\n",
    "            rels_new.append(each)\n",
    "    return rels_new"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship: Symptom - Disease Name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "symptom_disease = []\n",
    "for idx, row in df.iterrows():\n",
    "    for each_disease in row['Disease Name'].split(','):\n",
    "        for each_symptom in row['Symptom'].split(','):\n",
    "            symptom_disease.append([each_symptom, each_disease])\n",
    "symptom_disease = deduplicate(symptom_disease)\n",
    "symptom_disease"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Pathogenesis - Symptom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathogenesis_symptom = []\n",
    "for idx, row in df.iterrows():\n",
    "    for each_symptom in row['Symptom'].split(','):\n",
    "        for each_pathogenesis in row['Pathogenesis'].split(','):\n",
    "            pathogenesis_symptom.append([each_pathogenesis, each_symptom])\n",
    "pathogenesis_symptom = deduplicate(pathogenesis_symptom)\n",
    "pathogenesis_symptom"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Pathogenesis - Treatment Principle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pathogenesis_therapeutic_principle = []\n",
    "for idx, row in df.iterrows():\n",
    "    for each_therapeutic_principle in row['Treatment Principle'].split(','):\n",
    "        for each_pathogenesis in row['Pathogenesis'].split(','):\n",
    "            pathogenesis_therapeutic_principle.append([each_pathogenesis, each_therapeutic_principle])\n",
    "pathogenesis_therapeutic_principle = deduplicate(pathogenesis_therapeutic_principle)\n",
    "pathogenesis_therapeutic_principle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Treatment Principle - Medication"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "therapeutic_principle_medicine = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each_medicine in row['Medication'].split(','):\n",
    "            for each_therapeutic_principle in row['Treatment Principle'].split(','):\n",
    "                therapeutic_principle_medicine.append([each_therapeutic_principle, each_medicine])\n",
    "    except:\n",
    "        pass\n",
    "therapeutic_principle_medicine = deduplicate(therapeutic_principle_medicine)\n",
    "therapeutic_principle_medicine"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Symptom - Treatment Principle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "symptom_therapeutic_principle = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each_symptom in row['Symptom'].split(','):\n",
    "            for each_therapeutic_principle in row['Treatment Principle'].split(','):\n",
    "                symptom_therapeutic_principle.append([each_symptom, each_therapeutic_principle])\n",
    "    except:\n",
    "        pass\n",
    "symptom_therapeutic_principle = deduplicate(symptom_therapeutic_principle)\n",
    "symptom_therapeutic_principle"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Medication - Pathogenesis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "medicine_pathogenesis = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each_medicine in row['Medication'].split(','):\n",
    "            for each_pathogenesis in row['Pathogenesis'].split(','):\n",
    "                medicine_pathogenesis.append([each_medicine, each_pathogenesis])\n",
    "    except:\n",
    "        pass\n",
    "medicine_pathogenesis = deduplicate(medicine_pathogenesis)\n",
    "medicine_pathogenesis"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Relationship：Medication - Symptom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "medicine_symptom = []\n",
    "for idx, row in df.iterrows():\n",
    "    try:\n",
    "        for each_medicine in row['Medication'].split(','):\n",
    "            for each_symptom in row['Symptom'].split(','):\n",
    "                medicine_symptom.append([each_medicine, each_symptom])\n",
    "    except:\n",
    "        pass\n",
    "medicine_symptom = deduplicate(medicine_symptom)\n",
    "medicine_symptom"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Connect graph database"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note that the username here is the neo4j global username, not the name of the DBMS or database\n",
    "g = Graph('neo4j_url', auth=('neo4j', 'password'), name='neo4j')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create knowledge graph entities (nodes)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Note:This code block is used to delete all entities and relationships\n",
    "# cypher = 'MATCH (n) DETACH DELETE n'\n",
    "# g.run(cypher)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for each in diseases:\n",
    "    node = Node('Disease Name', name=each)\n",
    "    g.create(node)\n",
    "    print('Create the entity {}'.format(each))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for each in medicines:\n",
    "    node = Node('Medication', name=each)\n",
    "    g.create(node)\n",
    "    print('Create the entity {}'.format(each))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for each in symptoms:\n",
    "    node = Node('Symptom', name=each)\n",
    "    g.create(node)\n",
    "    print('Create the entity {}'.format(each))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for each in therapeutic_principles:\n",
    "    node = Node('Pathogenesis', name=each)\n",
    "    g.create(node)\n",
    "    print('Create the entity {}'.format(each))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "for each in pathogenesis:\n",
    "    node = Node('Treatment Principle', name=each)\n",
    "    g.create(node)\n",
    "    print('Create the entity {}'.format(each))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### Create knowledge graph relationships (edges)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the create relationship function\n",
    "def create_relationship(start_node, end_node, edges, rel_type, rel_name):\n",
    "    for edge in edges:\n",
    "        p = edge[0]\n",
    "        q = edge[1]\n",
    "        # Cypher statement for creating relationships\n",
    "        query = \"match(p:%s),(q:%s) where p.name='%s' and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)\" % (start_node, end_node, p, q, rel_type, rel_name)\n",
    "        try:\n",
    "            g.run(query) # Run Cypher statement\n",
    "            print('Create the relationship {}-{}->{}'.format(p, rel_type, q))\n",
    "        except Exception as e:\n",
    "            print(e)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Symptom-Belongs to->Disease Name"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Symptom', 'Disease Name', symptom_disease, 'Belongs to', 'Symptom belongs to Disease Name')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Pathogenesis-Causes->Symptom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Pathogenesis', 'Symptom', pathogenesis_symptom, 'Causes', 'Pathogenesis causes Symptom')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Pathogenesis-Follows->Treatment Principle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Pathogenesis', 'Treatment Principle', pathogenesis_therapeutic_principle, 'Follows', 'Pathogenesis follows Treatment Principle')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Treatment Principle-Recommends->Medication"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Treatment Principle', 'Medication', therapeutic_principle_medicine, 'Recommends', 'Treatment Principle recommends Medication')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Symptom-Applies->Treatment Principle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Symptom', 'Treatment Principle', symptom_therapeutic_principle, 'Applies', 'Symptom applies Treatment Principle')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Medication-Treats->Pathogenesis"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "create_relationship('Medication', 'Pathogenesis', medicine_pathogenesis, 'Treats', 'Medication treats Pathogenesis')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##### Medication-Treats->Symptom"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "create_relationship('Medication', 'Symptom', medicine_symptom, 'Treats', 'Medication treats Symptom')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
